# Author    : CoffeeChicken
# Date      : 2020-10-07 21:52
# Function  : 演示 使用BS4来进行数据提取

from bs4 import BeautifulSoup
import requests


url = "https://www.shicimingju.com/book/sanguoyanyi.html"

headers = {
    'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/85.0.4183.121 Safari/537.36"
}
# 爬取页面
page_text = requests.get(url, headers)
soup = BeautifulSoup(page_text.content, 'lxml')
print(soup)
li_list = soup.select(".book-mulu > ul > li")
for li in li_list:
    title = li.a.string
    title_url = "https://www.shicimingju.com/" + li.a['href']
    print(title_url)
    # 爬取章节页
    content_text = requests.get(title_url, headers)
    # 解析章节页面中的内容
    content_soup = BeautifulSoup(content_text.content, "lxml")
    # print(content_soup)
    page_content = content_soup.select(".chapter_content p")
    for item in page_content:
        print(item.string)


